home *** CD-ROM | disk | FTP | other *** search
/ PCGUIA 127 / PC Guia 127.iso / Software / Produtividade / OpenOffice.org 2.0.1 / openofficeorg4.cab / test_codeccallbacks.py < prev    next >
Text File  |  2005-11-19  |  28KB  |  710 lines

  1. import test.test_support, unittest
  2. import sys, codecs, htmlentitydefs, unicodedata
  3.  
  4. class PosReturn:
  5.     # this can be used for configurable callbacks
  6.  
  7.     def __init__(self):
  8.         self.pos = 0
  9.  
  10.     def handle(self, exc):
  11.         oldpos = self.pos
  12.         realpos = oldpos
  13.         if realpos<0:
  14.             realpos = len(exc.object) + realpos
  15.         # if we don't advance this time, terminate on the next call
  16.         # otherwise we'd get an endless loop
  17.         if realpos <= exc.start:
  18.             self.pos = len(exc.object)
  19.         return (u"<?>", oldpos)
  20.  
  21. class CodecCallbackTest(unittest.TestCase):
  22.  
  23.     def test_xmlcharrefreplace(self):
  24.         # replace unencodable characters which numeric character entities.
  25.         # For ascii, latin-1 and charmaps this is completely implemented
  26.         # in C and should be reasonably fast.
  27.         s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
  28.         self.assertEqual(
  29.             s.encode("ascii", "xmlcharrefreplace"),
  30.             "スパモ änd eggs"
  31.         )
  32.         self.assertEqual(
  33.             s.encode("latin-1", "xmlcharrefreplace"),
  34.             "スパモ \xe4nd eggs"
  35.         )
  36.  
  37.     def test_xmlcharnamereplace(self):
  38.         # This time use a named character entity for unencodable
  39.         # characters, if one is available.
  40.  
  41.         def xmlcharnamereplace(exc):
  42.             if not isinstance(exc, UnicodeEncodeError):
  43.                 raise TypeError("don't know how to handle %r" % exc)
  44.             l = []
  45.             for c in exc.object[exc.start:exc.end]:
  46.                 try:
  47.                     l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
  48.                 except KeyError:
  49.                     l.append(u"&#%d;" % ord(c))
  50.             return (u"".join(l), exc.end)
  51.  
  52.         codecs.register_error(
  53.             "test.xmlcharnamereplace", xmlcharnamereplace)
  54.  
  55.         sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
  56.         sout = "«ℜ» = ⟨ሴ€⟩"
  57.         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
  58.         sout = "\xabℜ\xbb = ⟨ሴ€⟩"
  59.         self.assertEqual(sin.encode("latin-1", "test.xmlcharnamereplace"), sout)
  60.         sout = "\xabℜ\xbb = ⟨ሴ\xa4⟩"
  61.         self.assertEqual(sin.encode("iso-8859-15", "test.xmlcharnamereplace"), sout)
  62.  
  63.     def test_uninamereplace(self):
  64.         # We're using the names from the unicode database this time,
  65.         # and we're doing "syntax highlighting" here, i.e. we include
  66.         # the replaced text in ANSI escape sequences. For this it is
  67.         # useful that the error handler is not called for every single
  68.         # unencodable character, but for a complete sequence of
  69.         # unencodable characters, otherwise we would output many
  70.         # unneccessary escape sequences.
  71.  
  72.         def uninamereplace(exc):
  73.             if not isinstance(exc, UnicodeEncodeError):
  74.                 raise TypeError("don't know how to handle %r" % exc)
  75.             l = []
  76.             for c in exc.object[exc.start:exc.end]:
  77.                 l.append(unicodedata.name(c, u"0x%x" % ord(c)))
  78.             return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
  79.  
  80.         codecs.register_error(
  81.             "test.uninamereplace", uninamereplace)
  82.  
  83.         sin = u"\xac\u1234\u20ac\u8000"
  84.         sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
  85.         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
  86.  
  87.         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
  88.         self.assertEqual(sin.encode("latin-1", "test.uninamereplace"), sout)
  89.  
  90.         sout = "\xac\033[1mETHIOPIC SYLLABLE SEE\033[0m\xa4\033[1mCJK UNIFIED IDEOGRAPH-8000\033[0m"
  91.         self.assertEqual(sin.encode("iso-8859-15", "test.uninamereplace"), sout)
  92.  
  93.     def test_backslashescape(self):
  94.         # Does the same as the "unicode-escape" encoding, but with different
  95.         # base encodings.
  96.         sin = u"a\xac\u1234\u20ac\u8000"
  97.         if sys.maxunicode > 0xffff:
  98.             sin += unichr(sys.maxunicode)
  99.         sout = "a\\xac\\u1234\\u20ac\\u8000"
  100.         if sys.maxunicode > 0xffff:
  101.             sout += "\\U%08x" % sys.maxunicode
  102.         self.assertEqual(sin.encode("ascii", "backslashreplace"), sout)
  103.  
  104.         sout = "a\xac\\u1234\\u20ac\\u8000"
  105.         if sys.maxunicode > 0xffff:
  106.             sout += "\\U%08x" % sys.maxunicode
  107.         self.assertEqual(sin.encode("latin-1", "backslashreplace"), sout)
  108.  
  109.         sout = "a\xac\\u1234\xa4\\u8000"
  110.         if sys.maxunicode > 0xffff:
  111.             sout += "\\U%08x" % sys.maxunicode
  112.         self.assertEqual(sin.encode("iso-8859-15", "backslashreplace"), sout)
  113.  
  114.     def test_relaxedutf8(self):
  115.         # This is the test for a decoding callback handler,
  116.         # that relaxes the UTF-8 minimal encoding restriction.
  117.         # A null byte that is encoded as "\xc0\x80" will be
  118.         # decoded as a null byte. All other illegal sequences
  119.         # will be handled strictly.
  120.         def relaxedutf8(exc):
  121.             if not isinstance(exc, UnicodeDecodeError):
  122.                 raise TypeError("don't know how to handle %r" % exc)
  123.             if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
  124.                 return (u"\x00", exc.start+2) # retry after two bytes
  125.             else:
  126.                 raise exc
  127.  
  128.         codecs.register_error(
  129.             "test.relaxedutf8", relaxedutf8)
  130.  
  131.         sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
  132.         sout = u"a\x00b\x00c\xfc\x00\x00"
  133.         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
  134.         sin = "\xc0\x80\xc0\x81"
  135.         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
  136.  
  137.     def test_charmapencode(self):
  138.         # For charmap encodings the replacement string will be
  139.         # mapped through the encoding again. This means, that
  140.         # to be able to use e.g. the "replace" handler, the
  141.         # charmap has to have a mapping for "?".
  142.         charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
  143.         sin = u"abc"
  144.         sout = "AABBCC"
  145.         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
  146.  
  147.         sin = u"abcA"
  148.         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
  149.  
  150.         charmap[ord("?")] = "XYZ"
  151.         sin = u"abcDEF"
  152.         sout = "AABBCCXYZXYZXYZ"
  153.         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
  154.  
  155.         charmap[ord("?")] = u"XYZ"
  156.         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
  157.  
  158.         charmap[ord("?")] = u"XYZ"
  159.         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
  160.  
  161.     def test_callbacks(self):
  162.         def handler1(exc):
  163.             if not isinstance(exc, UnicodeEncodeError) \
  164.                and not isinstance(exc, UnicodeDecodeError):
  165.                 raise TypeError("don't know how to handle %r" % exc)
  166.             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
  167.             return (u"[%s]" % u"".join(l), exc.end)
  168.  
  169.         codecs.register_error("test.handler1", handler1)
  170.  
  171.         def handler2(exc):
  172.             if not isinstance(exc, UnicodeDecodeError):
  173.                 raise TypeError("don't know how to handle %r" % exc)
  174.             l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
  175.             return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
  176.  
  177.         codecs.register_error("test.handler2", handler2)
  178.  
  179.         s = "\x00\x81\x7f\x80\xff"
  180.  
  181.         self.assertEqual(
  182.             s.decode("ascii", "test.handler1"),
  183.             u"\x00[<129>]\x7f[<128>][<255>]"
  184.         )
  185.         self.assertEqual(
  186.             s.decode("ascii", "test.handler2"),
  187.             u"\x00[<129>][<128>]"
  188.         )
  189.  
  190.         self.assertEqual(
  191.             "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
  192.             u"\u3042[<92><117><51><120>]xx"
  193.         )
  194.  
  195.         self.assertEqual(
  196.             "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
  197.             u"\u3042[<92><117><51><120><120>]"
  198.         )
  199.  
  200.         self.assertEqual(
  201.             codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
  202.             u"z[<98>][<99>]"
  203.         )
  204.  
  205.         self.assertEqual(
  206.             u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
  207.             u"g[<252><223>]rk"
  208.         )
  209.  
  210.         self.assertEqual(
  211.             u"g\xfc\xdf".encode("ascii", "test.handler1"),
  212.             u"g[<252><223>]"
  213.         )
  214.  
  215.     def test_longstrings(self):
  216.         # test long strings to check for memory overflow problems
  217.         errors = [ "strict", "ignore", "replace", "xmlcharrefreplace", "backslashreplace"]
  218.         # register the handlers under different names,
  219.         # to prevent the codec from recognizing the name
  220.         for err in errors:
  221.             codecs.register_error("test." + err, codecs.lookup_error(err))
  222.         l = 1000
  223.         errors += [ "test." + err for err in errors ]
  224.         for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
  225.             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
  226.                 for err in errors:
  227.                     try:
  228.                         uni.encode(enc, err)
  229.                     except UnicodeError:
  230.                         pass
  231.  
  232.     def check_exceptionobjectargs(self, exctype, args, msg):
  233.         # Test UnicodeError subclasses: construction, attribute assignment and __str__ conversion
  234.         # check with one missing argument
  235.         self.assertRaises(TypeError, exctype, *args[:-1])
  236.         # check with one argument too much
  237.         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
  238.         # check with one argument of the wrong type
  239.         wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
  240.         for i in xrange(len(args)):
  241.             for wrongarg in wrongargs:
  242.                 if type(wrongarg) is type(args[i]):
  243.                     continue
  244.                 # build argument array
  245.                 callargs = []
  246.                 for j in xrange(len(args)):
  247.                     if i==j:
  248.                         callargs.append(wrongarg)
  249.                     else:
  250.                         callargs.append(args[i])
  251.                 self.assertRaises(TypeError, exctype, *callargs)
  252.  
  253.         # check with the correct number and type of arguments
  254.         exc = exctype(*args)
  255.         self.assertEquals(str(exc), msg)
  256.  
  257.     def test_unicodeencodeerror(self):
  258.         self.check_exceptionobjectargs(
  259.             UnicodeEncodeError,
  260.             ["ascii", u"g\xfcrk", 1, 2, "ouch"],
  261.             "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
  262.         )
  263.         self.check_exceptionobjectargs(
  264.             UnicodeEncodeError,
  265.             ["ascii", u"g\xfcrk", 1, 4, "ouch"],
  266.             "'ascii' codec can't encode characters in position 1-3: ouch"
  267.         )
  268.         self.check_exceptionobjectargs(
  269.             UnicodeEncodeError,
  270.             ["ascii", u"\xfcx", 0, 1, "ouch"],
  271.             "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
  272.         )
  273.         self.check_exceptionobjectargs(
  274.             UnicodeEncodeError,
  275.             ["ascii", u"\u0100x", 0, 1, "ouch"],
  276.             "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
  277.         )
  278.         self.check_exceptionobjectargs(
  279.             UnicodeEncodeError,
  280.             ["ascii", u"\uffffx", 0, 1, "ouch"],
  281.             "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
  282.         )
  283.         if sys.maxunicode > 0xffff:
  284.             self.check_exceptionobjectargs(
  285.                 UnicodeEncodeError,
  286.                 ["ascii", u"\U00010000x", 0, 1, "ouch"],
  287.                 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
  288.             )
  289.  
  290.     def test_unicodedecodeerror(self):
  291.         self.check_exceptionobjectargs(
  292.             UnicodeDecodeError,
  293.             ["ascii", "g\xfcrk", 1, 2, "ouch"],
  294.             "'ascii' codec can't decode byte 0xfc in position 1: ouch"
  295.         )
  296.         self.check_exceptionobjectargs(
  297.             UnicodeDecodeError,
  298.             ["ascii", "g\xfcrk", 1, 3, "ouch"],
  299.             "'ascii' codec can't decode bytes in position 1-2: ouch"
  300.         )
  301.  
  302.     def test_unicodetranslateerror(self):
  303.         self.check_exceptionobjectargs(
  304.             UnicodeTranslateError,
  305.             [u"g\xfcrk", 1, 2, "ouch"],
  306.             "can't translate character u'\\xfc' in position 1: ouch"
  307.         )
  308.         self.check_exceptionobjectargs(
  309.             UnicodeTranslateError,
  310.             [u"g\u0100rk", 1, 2, "ouch"],
  311.             "can't translate character u'\\u0100' in position 1: ouch"
  312.         )
  313.         self.check_exceptionobjectargs(
  314.             UnicodeTranslateError,
  315.             [u"g\uffffrk", 1, 2, "ouch"],
  316.             "can't translate character u'\\uffff' in position 1: ouch"
  317.         )
  318.         if sys.maxunicode > 0xffff:
  319.             self.check_exceptionobjectargs(
  320.                 UnicodeTranslateError,
  321.                 [u"g\U00010000rk", 1, 2, "ouch"],
  322.                 "can't translate character u'\\U00010000' in position 1: ouch"
  323.             )
  324.         self.check_exceptionobjectargs(
  325.             UnicodeTranslateError,
  326.             [u"g\xfcrk", 1, 3, "ouch"],
  327.             "can't translate characters in position 1-2: ouch"
  328.         )
  329.  
  330.     def test_badandgoodstrictexceptions(self):
  331.         # "strict" complains about a non-exception passed in
  332.         self.assertRaises(
  333.             TypeError,
  334.             codecs.strict_errors,
  335.             42
  336.         )
  337.         # "strict" complains about the wrong exception type
  338.         self.assertRaises(
  339.             Exception,
  340.             codecs.strict_errors,
  341.             Exception("ouch")
  342.         )
  343.  
  344.         # If the correct exception is passed in, "strict" raises it
  345.         self.assertRaises(
  346.             UnicodeEncodeError,
  347.             codecs.strict_errors,
  348.             UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
  349.         )
  350.  
  351.     def test_badandgoodignoreexceptions(self):
  352.         # "ignore" complains about a non-exception passed in
  353.         self.assertRaises(
  354.            TypeError,
  355.            codecs.ignore_errors,
  356.            42
  357.         )
  358.         # "ignore" complains about the wrong exception type
  359.         self.assertRaises(
  360.            TypeError,
  361.            codecs.ignore_errors,
  362.            UnicodeError("ouch")
  363.         )
  364.         # If the correct exception is passed in, "ignore" returns an empty replacement
  365.         self.assertEquals(
  366.             codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
  367.             (u"", 1)
  368.         )
  369.         self.assertEquals(
  370.             codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
  371.             (u"", 1)
  372.         )
  373.         self.assertEquals(
  374.             codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
  375.             (u"", 1)
  376.         )
  377.  
  378.     def test_badandgoodreplaceexceptions(self):
  379.         # "replace" complains about a non-exception passed in
  380.         self.assertRaises(
  381.            TypeError,
  382.            codecs.replace_errors,
  383.            42
  384.         )
  385.         # "replace" complains about the wrong exception type
  386.         self.assertRaises(
  387.            TypeError,
  388.            codecs.replace_errors,
  389.            UnicodeError("ouch")
  390.         )
  391.         # With the correct exception, "ignore" returns an empty replacement
  392.         self.assertEquals(
  393.             codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
  394.             (u"?", 1)
  395.         )
  396.         self.assertEquals(
  397.             codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
  398.             (u"\ufffd", 1)
  399.         )
  400.         self.assertEquals(
  401.             codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
  402.             (u"\ufffd", 1)
  403.         )
  404.  
  405.     def test_badandgoodxmlcharrefreplaceexceptions(self):
  406.         # "xmlcharrefreplace" complains about a non-exception passed in
  407.         self.assertRaises(
  408.            TypeError,
  409.            codecs.xmlcharrefreplace_errors,
  410.            42
  411.         )
  412.         # "xmlcharrefreplace" complains about the wrong exception types
  413.         self.assertRaises(
  414.            TypeError,
  415.            codecs.xmlcharrefreplace_errors,
  416.            UnicodeError("ouch")
  417.         )
  418.         # "xmlcharrefreplace" can only be used for encoding
  419.         self.assertRaises(
  420.             TypeError,
  421.             codecs.xmlcharrefreplace_errors,
  422.             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
  423.         )
  424.         self.assertRaises(
  425.             TypeError,
  426.             codecs.xmlcharrefreplace_errors,
  427.             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
  428.         )
  429.         # Use the correct exception
  430.         self.assertEquals(
  431.             codecs.xmlcharrefreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
  432.             (u"&#%d;" % 0x3042, 1)
  433.         )
  434.  
  435.     def test_badandgoodbackslashreplaceexceptions(self):
  436.         # "backslashreplace" complains about a non-exception passed in
  437.         self.assertRaises(
  438.            TypeError,
  439.            codecs.backslashreplace_errors,
  440.            42
  441.         )
  442.         # "backslashreplace" complains about the wrong exception types
  443.         self.assertRaises(
  444.            TypeError,
  445.            codecs.backslashreplace_errors,
  446.            UnicodeError("ouch")
  447.         )
  448.         # "backslashreplace" can only be used for encoding
  449.         self.assertRaises(
  450.             TypeError,
  451.             codecs.backslashreplace_errors,
  452.             UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")
  453.         )
  454.         self.assertRaises(
  455.             TypeError,
  456.             codecs.backslashreplace_errors,
  457.             UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
  458.         )
  459.         # Use the correct exception
  460.         self.assertEquals(
  461.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
  462.             (u"\\u3042", 1)
  463.         )
  464.         self.assertEquals(
  465.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
  466.             (u"\\x00", 1)
  467.         )
  468.         self.assertEquals(
  469.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
  470.             (u"\\xff", 1)
  471.         )
  472.         self.assertEquals(
  473.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
  474.             (u"\\u0100", 1)
  475.         )
  476.         self.assertEquals(
  477.             codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
  478.             (u"\\uffff", 1)
  479.         )
  480.         if sys.maxunicode>0xffff:
  481.             self.assertEquals(
  482.                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
  483.                 (u"\\U00010000", 1)
  484.             )
  485.             self.assertEquals(
  486.                 codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
  487.                 (u"\\U0010ffff", 1)
  488.             )
  489.  
  490.     def test_badhandlerresults(self):
  491.         results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
  492.         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
  493.  
  494.         for res in results:
  495.             codecs.register_error("test.badhandler", lambda: res)
  496.             for enc in encs:
  497.                 self.assertRaises(
  498.                     TypeError,
  499.                     u"\u3042".encode,
  500.                     enc,
  501.                     "test.badhandler"
  502.                 )
  503.             for (enc, bytes) in (
  504.                 ("ascii", "\xff"),
  505.                 ("utf-8", "\xff"),
  506.                 ("utf-7", "+x-")
  507.             ):
  508.                 self.assertRaises(
  509.                     TypeError,
  510.                     bytes.decode,
  511.                     enc,
  512.                     "test.badhandler"
  513.                 )
  514.  
  515.     def test_lookup(self):
  516.         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
  517.         self.assertEquals(codecs.ignore_errors, codecs.lookup_error("ignore"))
  518.         self.assertEquals(codecs.strict_errors, codecs.lookup_error("strict"))
  519.         self.assertEquals(
  520.             codecs.xmlcharrefreplace_errors,
  521.             codecs.lookup_error("xmlcharrefreplace")
  522.         )
  523.         self.assertEquals(
  524.             codecs.backslashreplace_errors,
  525.             codecs.lookup_error("backslashreplace")
  526.         )
  527.  
  528.     def test_unencodablereplacement(self):
  529.         def unencrepl(exc):
  530.             if isinstance(exc, UnicodeEncodeError):
  531.                 return (u"\u4242", exc.end)
  532.             else:
  533.                 raise TypeError("don't know how to handle %r" % exc)
  534.         codecs.register_error("test.unencreplhandler", unencrepl)
  535.         for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
  536.             self.assertRaises(
  537.                 UnicodeEncodeError,
  538.                 u"\u4242".encode,
  539.                 enc,
  540.                 "test.unencreplhandler"
  541.             )
  542.  
  543.     def test_badregistercall(self):
  544.         # enhance coverage of:
  545.         # Modules/_codecsmodule.c::register_error()
  546.         # Python/codecs.c::PyCodec_RegisterError()
  547.         self.assertRaises(TypeError, codecs.register_error, 42)
  548.         self.assertRaises(TypeError, codecs.register_error, "test.dummy", 42)
  549.  
  550.     def test_unknownhandler(self):
  551.         # enhance coverage of:
  552.         # Modules/_codecsmodule.c::lookup_error()
  553.         self.assertRaises(LookupError, codecs.lookup_error, "test.unknown")
  554.  
  555.     def test_xmlcharrefvalues(self):
  556.         # enhance coverage of:
  557.         # Python/codecs.c::PyCodec_XMLCharRefReplaceErrors()
  558.         # and inline implementations
  559.         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
  560.         if sys.maxunicode>=100000:
  561.             v += (100000, 500000, 1000000)
  562.         s = u"".join([unichr(x) for x in v])
  563.         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
  564.         for enc in ("ascii", "iso-8859-15"):
  565.             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
  566.                 s.encode(enc, err)
  567.  
  568.     def test_decodehelper(self):
  569.         # enhance coverage of:
  570.         # Objects/unicodeobject.c::unicode_decode_call_errorhandler()
  571.         # and callers
  572.         self.assertRaises(LookupError, "\xff".decode, "ascii", "test.unknown")
  573.  
  574.         def baddecodereturn1(exc):
  575.             return 42
  576.         codecs.register_error("test.baddecodereturn1", baddecodereturn1)
  577.         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn1")
  578.         self.assertRaises(TypeError, "\\".decode, "unicode-escape", "test.baddecodereturn1")
  579.         self.assertRaises(TypeError, "\\x0".decode, "unicode-escape", "test.baddecodereturn1")
  580.         self.assertRaises(TypeError, "\\x0y".decode, "unicode-escape", "test.baddecodereturn1")
  581.         self.assertRaises(TypeError, "\\Uffffeeee".decode, "unicode-escape", "test.baddecodereturn1")
  582.         self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
  583.  
  584.         def baddecodereturn2(exc):
  585.             return (u"?", None)
  586.         codecs.register_error("test.baddecodereturn2", baddecodereturn2)
  587.         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
  588.  
  589.         handler = PosReturn()
  590.         codecs.register_error("test.posreturn", handler.handle)
  591.  
  592.         # Valid negative position
  593.         handler.pos = -1
  594.         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
  595.  
  596.         # Valid negative position
  597.         handler.pos = -2
  598.         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
  599.  
  600.         # Negative position out of bounds
  601.         handler.pos = -3
  602.         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
  603.  
  604.         # Valid positive position
  605.         handler.pos = 1
  606.         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
  607.  
  608.         # Largest valid positive position (one beyond end of input
  609.         handler.pos = 2
  610.         self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
  611.  
  612.         # Invalid positive position
  613.         handler.pos = 3
  614.         self.assertRaises(IndexError, "\xff0".decode, "ascii", "test.posreturn")
  615.  
  616.         # Restart at the "0"
  617.         handler.pos = 6
  618.         self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
  619.  
  620.         class D(dict):
  621.             def __getitem__(self, key):
  622.                 raise ValueError
  623.         self.assertRaises(UnicodeError, codecs.charmap_decode, "\xff", "strict", {0xff: None})
  624.         self.assertRaises(ValueError, codecs.charmap_decode, "\xff", "strict", D())
  625.         self.assertRaises(TypeError, codecs.charmap_decode, "\xff", "strict", {0xff: sys.maxunicode+1})
  626.  
  627.     def test_encodehelper(self):
  628.         # enhance coverage of:
  629.         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
  630.         # and callers
  631.         self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
  632.  
  633.         def badencodereturn1(exc):
  634.             return 42
  635.         codecs.register_error("test.badencodereturn1", badencodereturn1)
  636.         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
  637.  
  638.         def badencodereturn2(exc):
  639.             return (u"?", None)
  640.         codecs.register_error("test.badencodereturn2", badencodereturn2)
  641.         self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
  642.  
  643.         handler = PosReturn()
  644.         codecs.register_error("test.posreturn", handler.handle)
  645.  
  646.         # Valid negative position
  647.         handler.pos = -1
  648.         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
  649.  
  650.         # Valid negative position
  651.         handler.pos = -2
  652.         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
  653.  
  654.         # Negative position out of bounds
  655.         handler.pos = -3
  656.         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
  657.  
  658.         # Valid positive position
  659.         handler.pos = 1
  660.         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
  661.  
  662.         # Largest valid positive position (one beyond end of input
  663.         handler.pos = 2
  664.         self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
  665.  
  666.         # Invalid positive position
  667.         handler.pos = 3
  668.         self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
  669.  
  670.         handler.pos = 0
  671.  
  672.         class D(dict):
  673.             def __getitem__(self, key):
  674.                 raise ValueError
  675.         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
  676.             self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
  677.             self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
  678.             self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
  679.  
  680.     def test_translatehelper(self):
  681.         # enhance coverage of:
  682.         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
  683.         # and callers
  684.         # (Unfortunately the errors argument is not directly accessible
  685.         # from Python, so we can't test that much)
  686.         class D(dict):
  687.             def __getitem__(self, key):
  688.                 raise ValueError
  689.         self.assertRaises(ValueError, u"\xff".translate, D())
  690.         self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
  691.         self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
  692.  
  693.     def test_bug828737(self):
  694.         charmap = {
  695.             ord("&"): u"&",
  696.             ord("<"): u"<",
  697.             ord(">"): u">",
  698.             ord('"'): u""",
  699.         }
  700.         
  701.         for n in (1, 10, 100, 1000):
  702.             text = u'abc<def>ghi'*n
  703.             text.translate(charmap)
  704.  
  705. def test_main():
  706.     test.test_support.run_unittest(CodecCallbackTest)
  707.  
  708. if __name__ == "__main__":
  709.     test_main()
  710.